rm(list = ls(all.names = TRUE))
gc()
set.seed(211917)
options(scipen=999)

packages <-c("tidyverse","foreign","stargazer","naniar","xtable")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

setwd("PUT YOUR WORKING DIRECTORY HERE")

# load data
dat <- read.dta("./data.dta")

#---------------#
# Cleaning Data #
#---------------#

# some preliminary cleaning
dat$q6_years_married[dat$q6_years_married == -9] <- 0
dat$treatment_binary <- ifelse(dat$holy_endorser==1 | dat$secular_endorser==1, 1, 0)
dat$q30_combined_s <- scale(dat$q30_combined)

## select all possible covariates and outcome at first
dat <- dat %>% 
  dplyr::select(q30_combined, q30_combined_s, treatment_binary, respondent_muslim, 
                respondent_male, enumerator_muslim, enumerator_male, q1_age, q2_male, 
                q3_kamba, q3_kikuyu, q3_luo, q3_somali, q3_other, q4_religion_christian, 
                q4_religion_muslim, q5_convert, q6_years_married, q6_married, q7_has_job, 
                q8_income_last_month, q8_has_income, q9_chance_become_rich, q10_plans_to_vote, 
                q11_govt_represents_interest, q12_feels_on_loosing_side, q13_a_knows_emigrant, 
                q13_b_knows_emigrant_som_eri, q15_witnesses_interrel_violence, 
                q16_witnesses_viol_musl_govt, q17_friend_family_died, q18_lost_job, 
                q18_arrested, q18_house_of_worship_raided, q18_stopped_speaking_parents, 
                q18_love_problems, q18_friend_left_country, q18_sum, q19_rlts_mother, 
                q19_not_raised_by_mom, q19_mother_dead, q20_rlts_father, q20_not_raised_by_father, 
                q20_father_dead, q21_respect_friends_family, q22_gender, q22_religion, q22_tribal, 
                q22_national, q22_youth, q23_religious_attendance, q23_frequents_house_worship, 
                q24_deontology, q25_payment_acceptable, q26_payment_ethical, q27_radicalization, 
                q33_distance)

## check for missings
vis_miss(dat)

## following variables seems problematic, i.e., shouldn't impute at this level
table(is.na(dat$q13_a_knows_emigrant))
table(is.na(dat$q19_rlts_mother))
table(is.na(dat$q20_rlts_father))
table(is.na(dat$q23_religious_attendance))

## remove problematic variables
dat <- dat %>% 
  dplyr::select(-c(q13_a_knows_emigrant,q19_rlts_mother,q20_rlts_father,q23_religious_attendance))

## check missing again:
vis_miss(dat)

## remove one missing treatment indicator (can't be imputed)
dat <- dat[which(dat$treatment_binary>-1),]

## impute rest of missings with mean 
for(i in 1:ncol(dat)){
  dat[is.na(dat[,i]), i] <- mean(dat[,i], na.rm = TRUE)
}
rm(i)

##remove missing cases
dat <- dat[complete.cases(dat), ]

#--------------------#
# declare covariates #
#--------------------#

dat$sample = 1
dat$respondent_christian <- (dat$respondent_muslim - 1)*-1
treats <- c("sample", "respondent_muslim", "respondent_christian")
vars <- c(colnames(dat[4:53]))

## remove some covariates
elements_2_remove <- c("q2_male", "q4_religion_christian", "q4_religion_muslim")
vars <- vars[!(vars %in% elements_2_remove)]

## helper function
get_stats <- function(var, treat, data) {
  c(mean(data[, var][data[, treat] ==1 ], na.rm = T), sd(data[, var][data[, treat] ==1 ], na.rm = T))
}

## loop
btable <- lapply(treats, function(y) t(sapply(vars, function(x) {
  get_stats(x, y, data = dat)
})))

## to dat, add n, colnames
btable <- data.frame(do.call(cbind, btable))

## times 100 for percentages
elements_2_remove = c("q1_age", "q6_years_married", "q8_income_last_month", "q9_chance_become_rich", "q11_govt_represents_interest", "q12_feels_on_loosing_side",  "q18_sum", "q21_respect_friends_family", "q23_frequents_house_worship", "q24_deontology", "q25_payment_acceptable", "q26_payment_ethical", "q27_radicalization", "q33_distance")
perc <- vars[!(vars %in% elements_2_remove)]
btable[rownames(btable) %in% perc, ] <- btable[rownames(btable) %in% perc, ] * 100

## add varnames
btable$variable <- rownames(btable)

## add n obs
n <- sapply(vars, function(x) sum(!is.na(dat[, x])))
btable <- cbind(n, btable)

## change colnames
cn <- paste0(rep(treats, each = 2), rep(c("_mean", "_sd"), 2))
colnames(btable)[c(-1, -ncol(btable))] <- cn
btable <- data.frame(round(btable[, -which(colnames(btable) == "variable")], 1))
rownames(btable) <- c("Muslim", "Male", "Enum Muslim", "Enum Male", "Age", "Tribe: Kamba", "Tribe: Kikuyu", "Tribe: Luo", "Tribe: Somali", "Tribe: Other", "Convert", "Years married", "Married", "Has job", "Income (KES)", "Any income", "Chance to become richt", "Plans to vote", "Govt represents interests", "Feels on losing side", "Knows Somali emigrant", "Witnessed interreligious violence", "Witnessed governmental violence against Muslims", "Friend or familie died", "Lost job", "Arrested", "House of worship raided", "Stopped speaking to parents", "Love problems", "Friend left country", "Sum of above", "Not raised by mother", "Mother dead", "Not raised by father", "Father dead", "Respect for family and friends", "Identity: gender", "Identity: religion", "Identity: tribal", "Identity: national", "Identity: youth", "Frequents house of worship", "Attitudes consequentialism", "Payment acceptable", "Payment ethical", "Radicalization scale", "Social distance toward Muslim or Christian")
btable

xtable(btable, digits=1)

